This file contains analyses that identify the overall correlation between GPT-4’s ratings and human ratings for different psycholinguistic norms.
df_gpt = read_csv("../../data/processed/iconicity/iconicity_gpt-4.csv")
## Rows: 14772 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, gpt-4_response
## dbl (1): gpt4_rating
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_gpt)
## [1] 14772
df_human = read_csv("../../data/raw/iconicity/iconicity.csv")
## Rows: 14776 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): word
## dbl (5): n_ratings, n, prop_known, rating, rating_sd
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_human)
## [1] 14776
df_merged = df_human %>%
inner_join(df_gpt)
## Joining, by = "word"
nrow(df_merged)
## [1] 14772
### How correlated?
cor.test(df_merged$rating, df_merged$gpt4_rating)
##
## Pearson's product-moment correlation
##
## data: df_merged$rating and df_merged$gpt4_rating
## t = 98.953, df = 14770, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6215975 0.6409944
## sample estimates:
## cor
## 0.6313947
cor.test(df_merged$rating, df_merged$gpt4_rating, method = "spearman")
## Warning in cor.test.default(df_merged$rating, df_merged$gpt4_rating, method =
## "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: df_merged$rating and df_merged$gpt4_rating
## S = 2.1867e+11, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5929681
HUMAN_AGREEMENT_ICONICITY = 0.35
df_corr = df_merged %>%
summarise(r = cor(rating, gpt4_rating, method = "spearman")) %>%
mutate(dimension = "Iconicity")
df_corr %>%
ggplot(aes(x = dimension, y = r)) +
geom_bar(stat = "identity", alpha = .7) +
scale_y_continuous(limits = c(0, 1)) +
geom_hline(yintercept = HUMAN_AGREEMENT_ICONICITY, linetype = "dashed",
color = "steelblue3", size = 1) +
labs(x = "", y = "Correlation") +
theme_minimal()
df_merged %>%
ggplot(aes(x = gpt4_rating)) +
geom_histogram(alpha = .5, bins = 7)
df_merged %>%
ggplot(aes(x = rating)) +
geom_histogram(alpha = .5, bins = 7)
df_merged %>%
ggplot(aes(x = gpt4_rating, y = rating)) +
geom_point(alpha = .6) +
geom_smooth(method = "lm") +
theme_minimal() +
labs(x = "GPT-4 Iconicity Judgment",
y = "Human Iconicity Judgment")
## `geom_smooth()` using formula 'y ~ x'
df_merged = df_merged %>%
mutate(diff = gpt4_rating - rating,
z = (gpt4_rating - rating) / rating_sd,
abs_diff = abs(diff),
abs_z = abs(z))
df_merged %>%
ggplot(aes(x = abs_z)) +
geom_histogram(alpha = .5, bins = 7)
dftop20 = df_merged %>%
arrange(desc(abs_z)) %>%
head(20)
summary(dftop20$rating)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.300 1.955 5.900 4.609 6.400 6.923
summary(dftop20$gpt4_rating)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 3.00 4.00 4.05 5.00 6.00
dftop20
## # A tibble: 20 × 12
## word n_ratings n prop_k…¹ rating ratin…² gpt-4…³ gpt4_…⁴ diff z
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 oomph 13 13 1 6.92 0.277 5 5 -1.92 -6.93
## 2 hi 10 10 1 6.2 0.789 3 3 -3.2 -4.06
## 3 legit 10 10 1 1.6 0.699 4 4 2.4 3.43
## 4 lead 10 10 1 1.7 0.675 4 4 2.3 3.41
## 5 trio 10 10 1 6.3 0.675 4 4 -2.3 -3.41
## 6 gosh 10 10 1 6 0.943 3 3 -3 -3.18
## 7 chopper 10 10 1 6.6 0.516 5 5 -1.6 -3.10
## 8 powered 10 10 1 5.8 0.919 3 3 -2.8 -3.05
## 9 swish 11 11 1 6.91 0.302 6 6 -0.909 -3.02
## 10 shape 12 12 1 1.42 0.900 4 4 2.58 2.87
## 11 wiggle 10 10 1 6.9 0.316 6 6 -0.9 -2.85
## 12 popper 10 10 1 6.4 0.843 4 4 -2.4 -2.85
## 13 direful 10 12 0.833 2 1.05 5 5 3 2.85
## 14 swash 10 11 0.909 6.3 0.823 4 4 -2.3 -2.79
## 15 wring 10 11 0.909 6.4 0.516 5 5 -1.4 -2.71
## 16 heedless 11 11 1 2.27 0.647 4 4 1.73 2.67
## 17 taxation 11 11 1 4.64 1.03 2 2 -2.64 -2.57
## 18 dictate 10 10 1 4.7 0.675 3 3 -1.7 -2.52
## 19 partial 10 10 1 1.3 0.675 3 3 1.7 2.52
## 20 below 11 11 1 1.82 0.874 4 4 2.18 2.50
## # … with 2 more variables: abs_diff <dbl>, abs_z <dbl>, and abbreviated
## # variable names ¹prop_known, ²rating_sd, ³`gpt-4_response`, ⁴gpt4_rating
## # ℹ Use `colnames()` to see all variable names
df_gpt = read_csv("../../data/processed/simlex/simlex_gpt-4.csv")
## Rows: 999 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): word1, word2, POS, gpt-4_response
## dbl (8): SimLex999, conc(w1), conc(w2), concQ, Assoc(USF), SimAssoc333, SD(S...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_gpt)
## [1] 999
df_human = read_csv("../../data/raw/simlex/simlex.csv")
## Rows: 999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): word1, word2, POS
## dbl (7): SimLex999, conc(w1), conc(w2), concQ, Assoc(USF), SimAssoc333, SD(S...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_human)
## [1] 999
df_merged = df_human %>%
inner_join(df_gpt)
## Joining, by = c("word1", "word2", "POS", "SimLex999", "conc(w1)", "conc(w2)",
## "concQ", "Assoc(USF)", "SimAssoc333", "SD(SimLex)")
nrow(df_merged)
## [1] 999
### How correlated?
cor.test(df_merged$SimLex999, df_merged$gpt4_rating)
##
## Pearson's product-moment correlation
##
## data: df_merged$SimLex999 and df_merged$gpt4_rating
## t = 53.444, df = 997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8440121 0.8761999
## sample estimates:
## cor
## 0.8609654
cor.test(df_merged$SimLex999, df_merged$gpt4_rating, method = "spearman")
## Warning in cor.test.default(df_merged$SimLex999, df_merged$gpt4_rating, : Cannot
## compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: df_merged$SimLex999 and df_merged$gpt4_rating
## S = 22589372, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.8640562
HUMAN_AGREEMENT_SIMLEX = 0.78
df_corr = df_merged %>%
summarise(r = cor(SimLex999, gpt4_rating, method = "spearman")) %>%
mutate(dimension = "Similarity (SimLex999)")
df_corr %>%
ggplot(aes(x = dimension, y = r)) +
geom_bar(stat = "identity", alpha = .7) +
scale_y_continuous(limits = c(0, 1)) +
geom_hline(yintercept = HUMAN_AGREEMENT_SIMLEX, , linetype = "dashed",
color = "steelblue3", size = 1) +
labs(x = "", y = "Correlation") +
theme_minimal()
df_merged %>%
ggplot(aes(x = gpt4_rating, y = SimLex999)) +
geom_point() +
geom_smooth(method = "lm") +
theme_minimal() +
labs(x = "GPT-4 Similarity Judgment",
y = "Human Similarity Judgment")
## `geom_smooth()` using formula 'y ~ x'
df_merged %>%
ggplot(aes(x = gpt4_rating, y = SimLex999)) +
geom_point() +
geom_smooth(method = "lm") +
theme_minimal() +
labs(x = "GPT-4 Similarity Judgment",
y = "Human Similarity Judgment") +
facet_wrap(~SimAssoc333)
## `geom_smooth()` using formula 'y ~ x'
df_merged = df_merged %>%
mutate(diff = gpt4_rating - SimLex999,
abs_diff = abs(diff))
df_merged %>%
ggplot(aes(x = diff)) +
geom_histogram(alpha = .5, bins = 7)
df_merged %>%
ggplot(aes(x = SimAssoc333, y = abs_diff)) +
geom_jitter(alpha = .1, width = .1) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
theme_minimal() +
labs(x = "Associated (0 = no, 1 = yes)",
y = "Absolute error")
df_merged %>%
ggplot(aes(y = POS, x = abs_diff)) +
geom_jitter(alpha = .1, width = .1) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
theme_minimal() +
labs(y = "Part-of-Speech",
x = "Absolute Error")
df_merged %>%
ggplot(aes(y = factor(concQ), x = abs_diff)) +
geom_jitter(alpha = .1, width = .1) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
theme_minimal() +
labs(y = "Concreteness Quartile",
x = "Absolute Error")
m_full = lm(data = df_merged, abs_diff ~ concQ + POS)
m_conc = lm(data = df_merged, abs_diff ~ concQ)
m_pos = lm(data = df_merged, abs_diff ~ POS)
anova(m_conc, m_full)
## Analysis of Variance Table
##
## Model 1: abs_diff ~ concQ
## Model 2: abs_diff ~ concQ + POS
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 997 1005.85
## 2 995 979.99 2 25.861 13.128 2.357e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m_pos, m_full)
## Analysis of Variance Table
##
## Model 1: abs_diff ~ POS
## Model 2: abs_diff ~ concQ + POS
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 996 995.42
## 2 995 979.99 1 15.428 15.664 8.101e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m_full)
##
## Call:
## lm(formula = abs_diff ~ concQ + POS, data = df_merged)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3862 -0.7260 -0.2572 0.5135 5.4830
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.55708 0.10676 5.218 2.20e-07 ***
## concQ 0.15017 0.03794 3.958 8.10e-05 ***
## POSN 0.20946 0.12061 1.737 0.0828 .
## POSV 0.54875 0.11556 4.749 2.35e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9924 on 995 degrees of freedom
## Multiple R-squared: 0.04055, Adjusted R-squared: 0.03765
## F-statistic: 14.02 on 3 and 995 DF, p-value: 5.919e-09
dftop20 = df_merged %>%
arrange(desc(abs_diff)) %>%
head(20)
summary(dftop20$rating)
## Warning: Unknown or uninitialised column: `rating`.
## Length Class Mode
## 0 NULL NULL
summary(dftop20$gpt4_rating)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 7.00 7.00 7.15 8.00 9.00
dftop20
## # A tibble: 20 × 14
## word1 word2 POS SimLe…¹ conc(…² conc(…³ concQ Assoc…⁴ SimAs…⁵ SD(Si…⁶
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 wife husband N 2.3 4.13 4.11 3 8.85 1 0.84
## 2 multiply divide V 1.75 2.79 2.86 2 2.53 1 1.32
## 3 south north N 2.2 3.84 4.14 3 7.72 1 0.59
## 4 sunset sunrise N 2.47 4.54 4.69 3 4.8 1 1.12
## 5 dog cat N 1.75 4.85 4.86 4 5.13 1 1.3
## 6 groom bride N 3.17 4.54 4.63 3 8.65 1 0.68
## 7 add divide V 2.3 3 2.86 2 0.41 0 1.25
## 8 absence presence N 0.4 2.31 2.72 1 1.38 1 1.5
## 9 lady gentlem… N 3.42 4.33 3.57 3 3.29 1 1.08
## 10 go come V 2.42 3.15 2.72 2 5.75 1 1.51
## 11 dad mother N 3.55 4.29 4.6 3 0.31 0 1.44
## 12 spend save V 0.55 2.93 2.42 2 0.61 1 1.06
## 13 north west N 3.63 4.14 3.44 3 0.31 0 1.6
## 14 bottom top N 0.7 4.25 3.93 3 6.96 1 1.16
## 15 liver lung N 2.7 4.68 4.82 4 0.14 0 1.38
## 16 multiply add V 2.7 2.79 3 2 0.5 1 1.8
## 17 rice bean N 2.72 4.86 5 4 0.34 0 1.27
## 18 bee ant N 2.78 4.88 4.86 4 0.34 0 1.04
## 19 leg arm N 2.88 4.83 4.96 4 6.73 1 0.79
## 20 cow goat N 2.93 4.96 5 4 0.42 0 0.59
## # … with 4 more variables: `gpt-4_response` <chr>, gpt4_rating <dbl>,
## # diff <dbl>, abs_diff <dbl>, and abbreviated variable names ¹SimLex999,
## # ²`conc(w1)`, ³`conc(w2)`, ⁴`Assoc(USF)`, ⁵SimAssoc333, ⁶`SD(SimLex)`
## # ℹ Use `colnames()` to see all variable names
df_gpt = read_csv("../../data/processed/simverb/simverb_gpt-4.csv")
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 3500 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word1, word2
## dbl (2): gpt-4_response, gpt4_rating
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_gpt)
## [1] 3500
df_human = read_csv("../../data/raw/simverb/simverb.csv")
## Rows: 3500 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): word1, word2, POS, Relation
## dbl (1): Similarity
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_human)
## [1] 3500
df_merged = df_human %>%
inner_join(df_gpt)
## Joining, by = c("word1", "word2")
nrow(df_merged)
## [1] 3500
### How correlated?
cor.test(df_merged$Similarity, df_merged$gpt4_rating)
##
## Pearson's product-moment correlation
##
## data: df_merged$Similarity and df_merged$gpt4_rating
## t = 78.991, df = 3498, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7882583 0.8120784
## sample estimates:
## cor
## 0.8004842
cor.test(df_merged$Similarity, df_merged$gpt4_rating, method = "spearman")
## Warning in cor.test.default(df_merged$Similarity, df_merged$gpt4_rating, :
## Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: df_merged$Similarity and df_merged$gpt4_rating
## S = 1381649701, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.8066496
HUMAN_AGREEMENT_SIMVERB = 0.86
df_corr = df_merged %>%
summarise(r = cor(Similarity, gpt4_rating, method = "spearman")) %>%
mutate(Dimension = "Similarity (SimVerb3500)")
df_corr %>%
ggplot(aes(x = Dimension, y = r)) +
geom_bar(stat = "identity", alpha = .7) +
scale_y_continuous(limits = c(0, 1)) +
geom_hline(yintercept = HUMAN_AGREEMENT_SIMVERB, , linetype = "dashed",
color = "steelblue3", size = 1) +
labs(x = "", y = "Correlation") +
theme_minimal()
df_merged %>%
ggplot(aes(x = gpt4_rating, y = Similarity)) +
geom_point() +
geom_smooth(method = "lm") +
theme_minimal() +
labs(x = "GPT-4 Similarity Judgment",
y = "Human Similarity Judgment")
## `geom_smooth()` using formula 'y ~ x'
df_merged %>%
ggplot(aes(x = gpt4_rating, y = Similarity)) +
geom_point(alpha = .5) +
geom_smooth(method = "lm") +
theme_minimal() +
labs(x = "GPT-4 Similarity Judgment",
y = "Human Similarity Judgment") +
facet_wrap(~Relation)
## `geom_smooth()` using formula 'y ~ x'
df_merged = df_merged %>%
mutate(diff = gpt4_rating - Similarity,
abs_diff = abs(diff))
df_merged %>%
ggplot(aes(x = diff)) +
geom_histogram(alpha = .5, bins = 7)
df_merged %>%
ggplot(aes(y = reorder(Relation, abs_diff), x = abs_diff)) +
geom_jitter(alpha = .1, width = .1) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
theme_minimal() +
labs(y = "Relation Type",
x = "Absolute Error")
m = lm(data = df_merged, abs_diff ~ Relation)
m_reduced = lm(data = df_merged, abs_diff ~ 1)
anova(m_reduced, m)
## Analysis of Variance Table
##
## Model 1: abs_diff ~ 1
## Model 2: abs_diff ~ Relation
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 3499 5149.4
## 2 3495 4924.8 4 224.63 39.853 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m)
##
## Call:
## lm(formula = abs_diff ~ Relation, data = df_merged)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4220 -0.8890 -0.2409 0.6210 6.1210
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.5920 0.1127 23.005 < 2e-16 ***
## RelationCOHYPONYMS -0.6565 0.1418 -4.629 3.81e-06 ***
## RelationHYPER/HYPONYMS -1.3211 0.1202 -10.988 < 2e-16 ***
## RelationNONE -1.0430 0.1156 -9.021 < 2e-16 ***
## RelationSYNONYMS -1.2767 0.1315 -9.711 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.187 on 3495 degrees of freedom
## Multiple R-squared: 0.04362, Adjusted R-squared: 0.04253
## F-statistic: 39.85 on 4 and 3495 DF, p-value: < 2.2e-16
dftop20 = df_merged %>%
arrange(desc(abs_diff)) %>%
head(20)
summary(dftop20$rating)
## Warning: Unknown or uninitialised column: `rating`.
## Length Class Mode
## 0 NULL NULL
summary(dftop20$gpt4_rating)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 7.0 7.5 7.2 8.0 9.0
dftop20
## # A tibble: 20 × 9
## word1 word2 POS Similarity Relation gpt-4…¹ gpt4_…² diff abs_d…³
## <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 object disagree V 7.85 COHYPONYMS NA 0 -7.85 7.85
## 2 incline decline V 0.33 NONE 8 8 7.67 7.67
## 3 win defeat V 1.49 NONE 9 9 7.51 7.51
## 4 jerk prick V 0.5 NONE 8 8 7.5 7.5
## 5 multiply divide V 0.5 ANTONYMS 8 8 7.5 7.5
## 6 disallow allow V 0.5 ANTONYMS 8 8 7.5 7.5
## 7 subtract multiply V 0.83 COHYPONYMS 8 8 7.17 7.17
## 8 buy sell V 0 ANTONYMS 7 7 7 7
## 9 exhale inhale V 1 ANTONYMS 8 8 7 7
## 10 divide add V 0 COHYPONYMS 7 7 7 7
## 11 ask tell V 0.66 HYPER/HYPO… 7 7 6.34 6.34
## 12 kick punch V 1.66 COHYPONYMS 8 8 6.34 6.34
## 13 reap sow V 0.66 NONE 7 7 6.34 6.34
## 14 disappear reappear V 0.66 NONE 7 7 6.34 6.34
## 15 push tug V 0.66 SYNONYMS 7 7 6.34 6.34
## 16 die kill V 0.83 NONE 7 7 6.17 6.17
## 17 need want V 1.99 SYNONYMS 8 8 6.01 6.01
## 18 spring fall V 1 NONE 7 7 6 6
## 19 sell purchase V 2.16 ANTONYMS 8 8 5.84 5.84
## 20 please beg V 1.16 NONE 7 7 5.84 5.84
## # … with abbreviated variable names ¹`gpt-4_response`, ²gpt4_rating, ³abs_diff
df_gpt = read_csv("../../data/processed/raw-c/raw-c_gpt-4.csv")
## Rows: 672 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): word, sentence1, sentence2, ambiguity_type, disambiguating_word1, ...
## dbl (9): mean_relatedness, median_relatedness, diff, count, sd_relatedness,...
## lgl (1): same
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_gpt)
## [1] 672
### How correlated?
cor.test(df_gpt$mean_relatedness, df_gpt$gpt4_rating)
##
## Pearson's product-moment correlation
##
## data: df_gpt$mean_relatedness and df_gpt$gpt4_rating
## t = 35.17, df = 670, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7770950 0.8304386
## sample estimates:
## cor
## 0.8053915
cor.test(df_gpt$mean_relatedness, df_gpt$gpt4_rating, method = "spearman")
## Warning in cor.test.default(df_gpt$mean_relatedness, df_gpt$gpt4_rating, :
## Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: df_gpt$mean_relatedness and df_gpt$gpt4_rating
## S = 9333386, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.8154629
HUMAN_AGREEMENT_RAWC = 0.79
df_corr = df_gpt %>%
summarise(r = cor(mean_relatedness, gpt4_rating, method = "spearman")) %>%
mutate(Dimension = "Relatedness (RAW-C)")
df_corr %>%
ggplot(aes(x = Dimension, y = r)) +
geom_bar(stat = "identity", alpha = .7) +
scale_y_continuous(limits = c(0, 1)) +
geom_hline(yintercept = HUMAN_AGREEMENT_RAWC, , linetype = "dashed",
color = "steelblue3", size = 1) +
labs(x = "", y = "Correlation") +
theme_minimal()
df_gpt %>%
ggplot(aes(x = gpt4_rating, y = mean_relatedness)) +
geom_point() +
geom_smooth(method = "lm") +
theme_minimal() +
labs(x = "GPT-4 Relatedness Judgment",
y = "Human Relatedness Judgment")
## `geom_smooth()` using formula 'y ~ x'
df_gpt = df_gpt %>%
mutate(diff = gpt4_rating - mean_relatedness,
abs_diff = abs(diff))
df_gpt %>%
ggplot(aes(x = abs_diff)) +
geom_histogram(alpha = .5, bins = 7)
df_gpt %>%
ggplot(aes(y = reorder(same, abs_diff), x = abs_diff)) +
geom_jitter(alpha = .1, width = .1) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
theme_minimal() +
labs(y = "Same Sense",
x = "Absolute Error")
df_gpt %>%
ggplot(aes(x = abs_diff,
y = reorder(ambiguity_type, abs_diff),
fill = same)) +
geom_density_ridges2(aes(height = ..density..),
color=gray(0.25),
alpha = 0.5,
scale=0.85,
size=.9,
stat="density") +
labs(x = "Absolute error",
y = "Ambiguity Type",
fill = "Same vs. Different Sense") +
theme(
legend.position = "bottom"
) +
theme(axis.title = element_text(size=rel(1.5)),
axis.text = element_text(size = rel(1.5)),
legend.text = element_text(size = rel(1.5)),
legend.title = element_text(size = rel(1.5)),
strip.text.x = element_text(size = rel(1.5))) +
theme_minimal()
summary(lm(data = df_gpt, abs_diff ~ same))
##
## Call:
## lm(formula = abs_diff ~ same, data = df_gpt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.02447 -0.39947 -0.02447 0.39220 1.88463
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.02447 0.02447 41.86 <2e-16 ***
## sameTRUE -0.53430 0.04239 -12.61 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.518 on 670 degrees of freedom
## Multiple R-squared: 0.1917, Adjusted R-squared: 0.1905
## F-statistic: 158.9 on 1 and 670 DF, p-value: < 2.2e-16
### Get residuals
mod = lm(data = df_gpt, mean_relatedness ~ gpt4_rating + same)
summary(mod)
##
## Call:
## lm(formula = mean_relatedness ~ gpt4_rating + same, data = df_gpt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.05139 -0.57085 -0.08017 0.53396 2.37686
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.70089 0.11365 -6.167 1.2e-09 ***
## gpt4_rating 0.94773 0.05038 18.812 < 2e-16 ***
## sameTRUE 0.86544 0.09322 9.283 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7803 on 669 degrees of freedom
## Multiple R-squared: 0.6888, Adjusted R-squared: 0.6878
## F-statistic: 740.2 on 2 and 669 DF, p-value: < 2.2e-16
dftop20 = df_gpt %>%
arrange(desc(abs_diff)) %>%
head(20)
summary(dftop20$rating)
## Warning: Unknown or uninitialised column: `rating`.
## Length Class Mode
## 0 NULL NULL
summary(dftop20$gpt4_rating)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.0 2.0 2.5 2.5 3.0 3.0
dftop20 %>%
select(word, sentence1, sentence2, mean_relatedness, gpt4_rating)
## # A tibble: 20 × 5
## word sentence1 sentence2 mean_…¹ gpt4_…²
## <chr> <chr> <chr> <dbl> <dbl>
## 1 cape It was a red cape. It was a rocky cape. 0.0909 3
## 2 punch He had the alcoholic punch. He had the strongest pu… 0.429 3
## 3 toast They toasted the strudel. They toasted the host. 0.462 3
## 4 perch It was a tasty perch. It was a secure perch. 0.6 3
## 5 cross He crossed a room. He crossed a friend. 0.643 3
## 6 cross He crossed a road. He crossed an enemy. 0.727 3
## 7 panel It was a control panel. It was an advisory pane… 0.8 3
## 8 bat He saw a furry bat. He saw a wooden bat. 0 2
## 9 call They called the police. They called the debt. 1 3
## 10 cape It was a flowing cape. It was a rocky cape. 1 3
## 11 fan They had an electric fan. They had an enthusiasti… 0 2
## 12 orange It was a juicy orange. It was a reddish orange. 1 3
## 13 board It was the ironing board. It was the executive bo… 0.0667 2
## 14 degree It was about forty degrees. It was about associate … 0.0714 2
## 15 fan They had a ceiling fan. They had an enthusiasti… 0.0714 2
## 16 pitcher He saw the fast pitcher. He saw the glass pitche… 0.0714 2
## 17 pupil She had an interested pupil. She had a dilated pupil. 0.0714 2
## 18 degree It was about fifty degrees. It was about associate … 0.0769 2
## 19 file She had a data file. She had a nail file. 0.0833 2
## 20 band It was a rubber band. It was a country band. 0.1 2
## # … with abbreviated variable names ¹mean_relatedness, ²gpt4_rating
df_gpt = read_csv("../../data/processed/cs_norms_perception/cs_norms_perception_gpt-4.csv")
## Rows: 448 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, sentence
## dbl (6): Hearing, Interoception, Olfaction, Taste, Touch, Vision
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_gpt)
## [1] 448
df_human = read_csv("../../data/raw/cs_norms_perception/cs_norms_perception.csv")
## Rows: 448 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): word, sentence, context
## dbl (13): Vision.M, Vision.SD, Hearing.M, Hearing.SD, Olfaction.M, Olfaction...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_human)
## [1] 448
df_merged = df_human %>%
inner_join(df_gpt)
## Joining, by = c("word", "sentence")
nrow(df_merged)
## [1] 448
### How correlated?
df_summ = df_merged %>%
summarise(Vision = cor(Vision.M, Vision, method = "spearman"),
Hearing = cor(Hearing.M, Hearing, method = "spearman"),
Touch = cor(Touch.M, Touch, method = "spearman"),
Olfaction = cor(Olfaction.M, Olfaction, method = "spearman"),
Taste = cor(Taste.M, Taste, method = "spearman"),
Interoception = cor(Interoception.M, Interoception, method = "spearman"))
df_summ
## # A tibble: 1 × 6
## Vision Hearing Touch Olfaction Taste Interoception
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.663 0.662 0.750 0.708 0.626 0.545
df_long = df_summ %>%
pivot_longer(everything(), names_to = "Factor", values_to = "Correlation")
df_long %>%
ggplot(aes(x = reorder(Factor, Correlation), y = Correlation)) +
geom_bar(stat = "identity") +
labs(x = "", y = "Correlation") +
scale_y_continuous(limits = c(0,1)) +
theme_minimal()
### calculate overall correlation
### calculate overall correlation
df_human_long = df_human %>%
select(word, sentence, Vision.M,
Hearing.M,
Touch.M,
Olfaction.M,
Taste.M,
Interoception.M) %>%
pivot_longer(cols = c(Vision.M,
Hearing.M,
Touch.M,
Olfaction.M,
Taste.M,
Interoception.M),
names_to = "Dimension",
values_to = "Strength_human") %>%
mutate(Dimension = str_remove(Dimension, "\\.M$"))
df_gpt_long = df_gpt %>%
select(word, sentence,Vision, Hearing,Touch,Olfaction,Taste,Interoception) %>%
pivot_longer(cols = c(Vision, Hearing,Touch,Olfaction,Taste,Interoception),
names_to = "Dimension",
values_to = "Strength_GPT")
df_merged_long = df_human_long %>%
inner_join(df_gpt_long, on = c(word, sentence))
## Joining, by = c("word", "sentence", "Dimension")
cor.test(df_merged_long$Strength_GPT, df_merged_long$Strength_human)
##
## Pearson's product-moment correlation
##
## data: df_merged_long$Strength_GPT and df_merged_long$Strength_human
## t = 81.152, df = 2686, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8314800 0.8534075
## sample estimates:
## cor
## 0.8427931
cor.test(df_merged_long$Strength_GPT, df_merged_long$Strength_human, method = "spearman")
## Warning in cor.test.default(df_merged_long$Strength_GPT,
## df_merged_long$Strength_human, : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: df_merged_long$Strength_GPT and df_merged_long$Strength_human
## S = 528885361, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.8366102
df_merged_long %>%
ggplot(aes(x = Strength_GPT,
y = Strength_human)) +
geom_point(alpha = .5) +
geom_smooth(method = "lm") +
labs(x = "Strength (GPT rating)",
y = "Strength (Human rating)") +
facet_wrap(~Dimension) +
theme_minimal()
## `geom_smooth()` using formula 'y ~ x'
HUMAN_AGREEMENT_CSP = 0.64
df_corr = data.frame(r = cor(df_merged_long$Strength_human,
df_merged_long$Strength_GPT, method = "spearman"),
dimension = "Perception Norms")
df_corr %>%
ggplot(aes(x = dimension, y = r)) +
geom_bar(stat = "identity", alpha = .7) +
scale_y_continuous(limits = c(0, 1)) +
geom_hline(yintercept = HUMAN_AGREEMENT_CSP, , linetype = "dashed",
color = "steelblue3", size = 1) +
labs(x = "", y = "Correlation") +
theme_minimal()
df_gpt = read_csv("../../data/processed/cs_norms_action/cs_norms_action_gpt-4.csv")
## Rows: 448 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, sentence
## dbl (5): Foot_leg, Hand_arm, Head, Mouth_throat, Torso
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_gpt)
## [1] 448
df_human = read_csv("../../data/raw/cs_norms_action/cs_norms_action.csv")
## Rows: 448 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): word, sentence, context
## dbl (11): Foot_leg.M, Foot_leg.SD, Mouth_throat.M, Mouth_throat.SD, Torso.M,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_human)
## [1] 448
df_merged = df_human %>%
inner_join(df_gpt)
## Joining, by = c("word", "sentence")
nrow(df_merged)
## [1] 448
### How correlated?
df_summ = df_merged %>%
summarise(Hand_arm = cor(Hand_arm.M, Hand_arm, method = "spearman"),
Foot_leg = cor(Foot_leg.M, Foot_leg, method = "spearman"),
Head = cor(Head.M, Head, method = "spearman"),
Torso = cor(Torso.M, Torso, method = "spearman"),
Mouth_throat = cor(Mouth_throat.M, Mouth_throat, method = "spearman"))
df_summ
## # A tibble: 1 × 5
## Hand_arm Foot_leg Head Torso Mouth_throat
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.640 0.564 0.447 0.584 0.558
df_long = df_summ %>%
pivot_longer(everything(), names_to = "Factor", values_to = "Correlation")
df_long %>%
ggplot(aes(x = reorder(Factor, Correlation), y = Correlation)) +
geom_bar(stat = "identity") +
labs(x = "Factor", y = "Correlation") +
theme_minimal()
### calculate overall correlation
df_human_long = df_human %>%
select(word, sentence, Mouth_throat.M,
Foot_leg.M, Hand_arm.M, Torso.M, Head.M) %>%
pivot_longer(cols = c(Mouth_throat.M, Foot_leg.M, Hand_arm.M, Torso.M, Head.M),
names_to = "Dimension",
values_to = "Strength_human") %>%
mutate(Dimension = str_remove(Dimension, "\\.M$"))
df_gpt_long = df_gpt %>%
select(word, sentence, Mouth_throat, Foot_leg, Hand_arm, Torso, Head) %>%
pivot_longer(cols = c(Mouth_throat, Foot_leg, Hand_arm, Torso, Head),
names_to = "Dimension",
values_to = "Strength_GPT")
df_merged_long = df_human_long %>%
inner_join(df_gpt_long, on = c(word, sentence))
## Joining, by = c("word", "sentence", "Dimension")
cor.test(df_merged_long$Strength_GPT, df_merged_long$Strength_human)
##
## Pearson's product-moment correlation
##
## data: df_merged_long$Strength_GPT and df_merged_long$Strength_human
## t = 45.784, df = 2238, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6734235 0.7162299
## sample estimates:
## cor
## 0.6954431
cor.test(df_merged_long$Strength_GPT, df_merged_long$Strength_human, method = "spearman")
## Warning in cor.test.default(df_merged_long$Strength_GPT,
## df_merged_long$Strength_human, : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: df_merged_long$Strength_GPT and df_merged_long$Strength_human
## S = 679132929, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6374549
df_merged_long %>%
ggplot(aes(x = Strength_GPT,
y = Strength_human)) +
geom_point(alpha = .5) +
geom_smooth(method = "lm") +
labs(x = "Strength (GPT rating)",
y = "Strength (Human rating)") +
facet_wrap(~Dimension) +
theme_minimal()
## `geom_smooth()` using formula 'y ~ x'
HUMAN_AGREEMENT_ACTION = 0.5
df_corr = data.frame(r = cor(df_merged_long$Strength_GPT,
df_merged_long$Strength_human,
method = "spearman"),
dimension = "Action Norms")
df_corr %>%
ggplot(aes(x = dimension, y = r)) +
geom_bar(stat = "identity", alpha = .7) +
scale_y_continuous(limits = c(0, 1)) +
geom_hline(yintercept = HUMAN_AGREEMENT_ACTION, , linetype = "dashed",
color = "steelblue3", size = 1) +
labs(x = "", y = "Correlation") +
theme_minimal()
df_gpt = read_csv("../../data/processed/glasgow/glasgow_gpt-4.csv")
## Rows: 871 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): word
## dbl (9): AoA, Arousal, Concreteness, Dominance, Familiarity, Gender, Imageab...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_gpt)
## [1] 871
### Recode AoA
df_gpt = df_gpt %>%
mutate(AoA2 = case_when(
AoA <= 2 ~ 1,
AoA <= 4 ~ 2,
AoA <= 6 ~ 3,
AoA <= 8 ~ 4,
AoA <= 10 ~ 5,
AoA <= 12 ~ 6,
AoA >= 13 ~ 7
)) %>%
mutate(AoA = AoA2)
df_human = read_csv("../../data/raw/glasgow/glasgow.csv")
## Rows: 871 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): word
## dbl (28): Length, Arousal.M, Arousal.SD, Arousal.N, Valence.M, Valence.SD, V...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_human)
## [1] 871
df_merged = df_human %>%
inner_join(df_gpt) %>%
drop_na()
## Joining, by = "word"
nrow(df_merged)
## [1] 870
### How correlated?
df_summ = df_merged %>%
summarise(Valence = cor(Valence.M, Valence, method = "spearman"),
Arousal = cor(Arousal.M, Arousal, method = "spearman"),
Concreteness = cor(Concreteness.M, Concreteness, method = "spearman"),
Familiarity = cor(Familiarity.M, Familiarity, method = "spearman"),
Imageability = cor(Imageability.M, Imageability, method = "spearman"),
Dominance = cor(Dominance.M, Dominance, method = "spearman"),
AoA = cor(AoA.M, AoA, method = "spearman"),
Size = cor(Size.M, Size, method = "spearman"),
Gender = cor(Gender.M, Gender, method = "spearman"))
df_summ
## # A tibble: 1 × 9
## Valence Arousal Concreteness Familiarity Imageabi…¹ Domin…² AoA Size Gender
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.756 0.661 0.814 0.707 0.743 0.385 0.717 0.686 0.471
## # … with abbreviated variable names ¹Imageability, ²Dominance
df_long = df_summ %>%
pivot_longer(everything(), names_to = "Factor", values_to = "Correlation")
df_long %>%
ggplot(aes(y = reorder(Factor, Correlation), x = Correlation)) +
geom_bar(stat = "identity") +
labs(y = "Factor", x = "Correlation") +
theme_minimal()
### calculate overall correlation
### calculate overall correlation
df_human_long = df_human %>%
select(word, Valence.M, Arousal.M, Concreteness.M, Familiarity.M, Imageability.M, Dominance.M, AoA.M, Size.M, Gender.M) %>%
pivot_longer(cols = c(Valence.M, Arousal.M, Concreteness.M, Familiarity.M, Imageability.M, Dominance.M, AoA.M, Size.M, Gender.M),
names_to = "Dimension",
values_to = "Rating_human") %>%
mutate(Dimension = str_remove(Dimension, "\\.M$"))
df_gpt_long = df_gpt %>%
select(word,Valence, Arousal, Concreteness, Familiarity, Imageability, Dominance, AoA, Size, Gender) %>%
pivot_longer(cols = c(Valence, Arousal, Concreteness, Familiarity, Imageability, Dominance, AoA, Size, Gender),
names_to = "Dimension",
values_to = "Rating_GPT")
df_merged_long = df_human_long %>%
inner_join(df_gpt_long, on = c(word)) %>%
drop_na()
## Joining, by = c("word", "Dimension")
cor.test(df_merged_long$Rating_GPT, df_merged_long$Rating_human)
##
## Pearson's product-moment correlation
##
## data: df_merged_long$Rating_GPT and df_merged_long$Rating_human
## t = 88.884, df = 7835, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6973734 0.7194273
## sample estimates:
## cor
## 0.7085733
cor.test(df_merged_long$Rating_GPT, df_merged_long$Rating_human, method = "spearman")
## Warning in cor.test.default(df_merged_long$Rating_GPT,
## df_merged_long$Rating_human, : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: df_merged_long$Rating_GPT and df_merged_long$Rating_human
## S = 2.5043e+10, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.687827
df_merged_long %>%
ggplot(aes(x = Rating_GPT,
y = Rating_human)) +
geom_point(alpha = .5) +
geom_smooth(method = "lm") +
labs(x = "Rating (GPT)",
y = "Rating (Human)") +
facet_wrap(~Dimension) +
theme_minimal()
## `geom_smooth()` using formula 'y ~ x'
df_corr = data.frame(r = cor(df_merged_long$Rating_human,
df_merged_long$Rating_GPT,
method = "spearman"),
dimension = "Glasgow Norms")
df_corr %>%
ggplot(aes(x = dimension, y = r)) +
geom_bar(stat = "identity", alpha = .7) +
scale_y_continuous(limits = c(0, 1)) +
labs(x = "", y = "Correlation") +
theme_minimal()